// bdls_pathutil.h                                                    -*-C++-*-
#ifndef INCLUDED_BDLS_PATHUTIL
#define INCLUDED_BDLS_PATHUTIL

#include <bsls_ident.h>
BSLS_IDENT("$Id: $")

//@PURPOSE: Provide portable file path manipulation.
//
//@CLASSES:
//  bdls::PathUtil: Portable utility methods for manipulating paths
//
//@SEE_ALSO: bdls_filesystemutil
//
//@DESCRIPTION: This component provides utility methods for manipulating
// strings that represent paths in the filesystem.  Class methods of
// `bdls::PathUtil` include platform-independent operations to add or remove
// filenames or relative paths at the end of a path string (by "filenames" we
// are referring to the names of any filesystem item, including regular files
// and directories).  There are also methods to parse the path to delimit the
// "root" as defined for the current platform; see {Parsing and Performance
// (`rootEnd` argument)} below.
//
// Paths that have a root are called *absolute* paths, whereas paths that do
// not have a root are *relative* paths.
//
// Note that this component does not perform filesystem operations.  In
// particular, no effort is made to verify the existence or accessibility of
// any segment of any path.
//
///Terminology
///-----------
// To introduce the terminology explored in this section, lets start with a
// Unix example:
// ```
// "/foo/bar/myfile.txt"
// ```
// The elements of this path would be:
// ```
//           Path: "/foo/bar/myfile.txt"
//           Root: "/"                       # the starting separator(s)
// Leaf(Basename): "myfile.txt"
//      Extension: ".txt"
//        Dirname: "/foo/bar/"
// ```
//
///Separator
///- - - - -
// A platform dependent character that separates elements of a path, such as
// directory names from each other and file names.  The separator character is
// the `/` (slash) on Unix (and the like) systems and '\' (backslash) on
// Windows systems.
//
///Path
/// - -
// An optional root, followed by optional directories, followed by an optional
// filename.
//
///Root
/// - -
// The root, if present, is at the beginning of a path and its presence
// determines if a path is absolute (the root is present) or relative (the root
// is not present).  The textual rules for what a root is are platform
// dependent.  See [](#Unix Root) and [](#Windows Root).
//
// See also [](#Parsing and Performance} for important notes about speeding up
// functions (especially on Windows) by not reparsing roots every time a
// function is called.
//
///Unix Root
///  -  -  -
// The Unix root consists of the separator characters at the beginning of a
// path, so the root of "/one" is "/", the root of "//two" is "//", while the
// root of "somefile" is "" (there is no root, relative path).
//
///Windows Root
///  -  -  -  -
// The Windows root is much more complicated than the Unix root, because
// Windows has three different flavors of paths: local (LFS), UNC, and Long UNC
// UNC (LUNC):
//
//: LFS: root consists of a drive letter followed by a colon (the name part)
//:      and then zero or more separators (the directory part).  E.g.,
//:      "c:\hello.txt", root is "c:\"; "c:tmp" root is "c:"
//:
//: UNC: root consists of two separators followed by a hostname and
//:      separator (the name part), and then a shared folder followed by one
//:      or more separators (the directory part).  e.g.,
//:      "\\servername\sharefolder\output\test.t" root is
//:      "\\servername\sharefolder\"
//:
//: LUNC: root starts with "\\?\".  Then follows either "UNC" followed by
//:       a UNC root, or an LFS root.  The "\\?\" is included as part of
//:       the root name.  e.g.,
//:      "\\?\UNC\servername\folder\hello" root is "\\?\UNC\servername\dir\"
//:      while "\\?\c:\windows\test" root is "\\?\\c:\"
//
///Leaf (a.k.a. Basename)
/// - - - - - - - - - - -
// The leaf is the rightmost name following the root, in other words: the last
// element of the path.  Note that several methods in this utility require a
// leaf to be present to function (such as `getDirname`).  Note that a relative
// path may contain a leaf only.  Examples:
// ```
// Path                            Leaf
// ----                            ----
// "/tmp/foo/bar.txt"              "bar.txt"
// "c:\tmp\foo\bar.txt"            "bar.txt"
// "\\server\share\tmp\foo.txt"    "foo.txt"
// "/tmp/foo/"                     "foo"
// "/tmp/"                         "tmp"
// "/"                             Not Present
// ```
//
///Extension
///- - - - - -
// An extension is a suffix of a leaf that begins with a dot and that does
// not contain additional dots. There are a few caveats. The special leaf
// names "." and ".." are considered to not have extensions. Furthermore,
// if a leaf's name begins with a dot, such dot is not considered when
// determining the extension. For example, the leaf ".bashrc" does not have
// an extension, but ".bbprofile.log" does, and its extension is ".log".
// We will say that a path has an extension if it has a leaf and its leaf
// has an extension. Note that for consistency reasons, our implementation
// differs from other standard implementations in the same way `getLeaf`
// does: the path "/foo/bar.txt/" is considered to have an extension and
// its extension is ".txt". Examples:
// ```
// Path                            Extension
// ----                            -------
// "/tmp/foo/bar.txt"              ".txt"
// "/tmp/foo/bar"                  Not Present
// "/tmp/foo/bar.longextension"    ".longextension"
// "/a/b.txt/"                     ".txt"
// "/a/b.txt/."                    Not present
// "/a.txt/b.txt/.."               Not present
// "/a/.profile"                   Not present
// "/a/.profile.backup"            ".backup"
// "foo.txt"                       ".txt"
// ```
//
///Dirname
///- - - -
// Dirname is the part of the path that contains the root but not the leaf.
// Note that the `getDirname` utility method requires a leaf to be present to
// function.  Examples:
// ```
// Path                            Dirname
// ----                            -------
// "/tmp/foo/bar.txt"              "/tmp/foo/"
// "c:\tmp\foo\bar.txt"            "c:\tmp\foo\"
// "\\server\share\tmp\foo.txt"    "\\server\share\tmp\"
// "/tmp/foo/"                     "/tmp"
// "/tmp/"                         "/"
// "/"                             no leaf -> error
// "foo.txt"                       empty
// ```
//
///Parsing and Performance
///-----------------------
// Most methods of this component will perform basic parsing of the beginning
// part of the path to determine what part of it is the "root" as defined for
// the current platform.  This parsing is trivial on Unix platforms but is
// slightly more involved for the Windows operating system.  To accommodate
// client code which is willing to store parsing results in order to maximize
// performance, all methods which parse the "root" of the path accept an
// optional argument delimiting the "root"; if this argument is specified,
// parsing is skipped.
//
///Usage
///-----
// This section illustrates intended use of this component.
//
///Example 1: Basic Syntax
///- - - - - - - - - - - -
// We start with strings representing an absolute native path and a relative
// native path, respectively:
// ```
// #ifdef BSLS_PLATFORM_OS_WINDOWS
// bsl::string tempPath  = "c:\\windows\\temp";
// bsl::string otherPath = "22jan08\\log.txt";
// #else
// bsl::string tempPath  = "/var/tmp";
// bsl::string otherPath = "22jan08/log.txt";
// #endif
// ```
// `tempPath` is an absolute path, since it has a root.  It also has a leaf
// element ("temp"):
// ```
// assert(false == bdls::PathUtil::isRelative(tempPath));
// assert(true  == bdls::PathUtil::isAbsolute(tempPath));
// assert(true  == bdls::PathUtil::hasLeaf(tempPath));
// ```
// We can add filenames to the path one at a time, or we can add another path
// if is relative.  We can also remove filenames from the end of the path one
// at a time:
// ```
// bdls::PathUtil::appendRaw(&tempPath, "myApp");
// bdls::PathUtil::appendRaw(&tempPath, "logs");
//
// assert(true == bdls::PathUtil::isRelative(otherPath));
// assert(0    == bdls::PathUtil::appendIfValid(&tempPath, otherPath));
// assert(true == bdls::PathUtil::hasLeaf(tempPath));
//
// bdls::PathUtil::popLeaf(&tempPath);
// bdls::PathUtil::appendRaw(&tempPath, "log2.txt");
//
// #ifdef BSLS_PLATFORM_OS_WINDOWS
// assert("c:\\windows\\temp\\myApp\\logs\\22jan08\\log2.txt" == tempPath);
// #else
// assert("/var/tmp/myApp/logs/22jan08/log2.txt"              == tempPath);
// #endif
// ```
// A relative path may be appended to any other path, even itself.  An absolute
// path may not be appended to any path, or undefined behavior will result:
// ```
// assert(0 == bdls::PathUtil::appendIfValid(&otherPath, otherPath));  // OK
// /* bdls::PathUtil::append(&otherPath, tempPath); */ // UNDEFINED BEHAVIOR!
// ```
// Note that there is no attempt to distinguish filenames that are regular
// files from filenames that are directories, or to verify the existence of
// paths in the filesystem.
// ```
// #ifdef BSLS_PLATFORM_OS_WINDOWS
// assert("c:\\windows\\temp\\myApp\\logs\\22jan08\\log2.txt" == tempPath);
// #else
// assert("/var/tmp/myApp/logs/22jan08/log2.txt"              == tempPath);
// #endif
// ```
//
///Example 2: Parsing a path using `splitFilename`
///- - - - - - - - - - - - - - - - - - - - - - - -
// Suppose we need to obtain all filenames from the path.
//
// First, we create a path for splitting and a storage for filenames:
// ```
// #ifdef BSLS_PLATFORM_OS_WINDOWS
// const char                     *splitPath = "c:\\one\\two\\three\\four";
// #else
// const char                     *splitPath = "//one/two/three/four";
// #endif
// bsl::vector<bsl::string_view>  filenames;
// ```
// Then, we run a cycle to sever filenames from the end one by one:
// ```
// bsl::string_view head;
// bsl::string_view tail;
// bsl::string_view path(splitPath);
//
// do {
//     bdls::PathUtil::splitFilename(&head, &tail, path);
//     filenames.push_back(tail);
//     path = head;
// } while (!tail.empty());
// ```
// Now, verify the resulting values:
// ```
// assert(5           == filenames.size());
//
// assert("four"      == filenames[0]);
// assert("three"     == filenames[1]);
// assert("two"       == filenames[2]);
// assert("one"       == filenames[3]);
// assert(""          == filenames[4]);
// ```
// Finally, make sure that only the root remains of the original value:
// ```
// #ifdef BSLS_PLATFORM_OS_WINDOWS
// assert("c:\\"      == head);
// #else
// assert("//"        == head);
// #endif
// ```

#include <bdlscm_version.h>

#include <bsls_assert.h>
#include <bsls_libraryfeatures.h>
#include <bsls_platform.h>
#include <bsls_review.h>

#include <bsl_string.h>
#include <bsl_string_view.h>

#include <string>           // 'std::string', 'std::pmr::string'

namespace BloombergLP {
namespace bdls {

                              // ===============
                              // struct PathUtil
                              // ===============

/// This struct contains utility methods for platform-independent manipulation
/// of filesystem paths.  No method of this struct provides any filesystem
/// operations or accesses the filesystem as part of its implementation.
struct PathUtil {

    // PUBLIC CLASS DATA
    static const char k_SEPARATOR;
                          // character used as a preferred path separator; use
                          // of this constant is strongly discouraged (as
                          // platforms, like Windows, may support multiple
                          // separators in different contexts), instead prefer
                          // functions to join and split path strings

    // CLASS METHODS

    /// Append the specified `filename` to the end of the specified `path`
    /// if `filename` represents a relative path.  Return 0 on success, and
    /// a non-zero value otherwise.  Note that any filesystem separator
    /// characters at the end of `filename` or `path` will be discarded.
    /// See [](#Terminology) for the definition of separator.
    static int appendIfValid(bsl::string             *path,
                             const bsl::string_view&  filename);
    static int appendIfValid(std::string             *path,
                             const bsl::string_view&  filename);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int appendIfValid(std::pmr::string        *path,
                             const bsl::string_view&  filename);
#endif

    /// Append the specified `filename` up to the optionally specified `length`
    /// to the end of the specified `path`.  If `length` is negative, append
    /// the entire string.  If the optionally specified `rootEnd` offset is
    /// non-negative, it is taken as the position in `path` of the character
    /// following the root.  The behavior is undefined if `filename` represents
    /// an absolute path or if either `filename` or `path` ends with the
    /// filesystem separator character.  The behavior is also undefined if
    /// `filename` points to any part of `path` (i.e., `filename` may not be an
    /// alias for `path`).  See [](#Parsing and Performance).
    static void appendRaw(bsl::string *path,
                          const char  *filename,
                          int          length  = -1,
                          int          rootEnd = -1);
    static void appendRaw(std::string *path,
                          const char  *filename,
                          int          length  = -1,
                          int          rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static void appendRaw(std::pmr::string *path,
                          const char       *filename,
                          int               length  = -1,
                          int               rootEnd = -1);
#endif

    /// Remove from the specified `path` the rightmost filename following the
    /// root; that is, remove the leaf element.  If the optionally specified
    /// `rootEnd` offset is non-negative, it is taken as the position in `path`
    /// of the character following the root.  Return 0 on success, and a
    /// nonzero value otherwise; in particular, return a nonzero value if
    /// `path` does not have a leaf.  See [](#Parsing and Performance).  See
    /// also [](#Terminology) for the definition of leaf and root.
    static int popLeaf(bsl::string *path, int rootEnd = -1);
    static int popLeaf(std::string *path, int rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int popLeaf(std::pmr::string *path, int rootEnd = -1);
#endif

    /// Load into the specified `leaf` the value of the rightmost name in the
    /// specified `path` that follows the root; that is, the leaf element.  If
    /// the optionally specified `rootEnd` offset is non-negative, it is taken
    /// as the position in `path` of the character following the root.  Return
    /// 0 on success, and a non-zero value otherwise; in particular, return
    /// nonzero if `path` does not have a leaf.  Note that `getBasename` is a
    /// synonym for `getLeaf`.  See [](#Parsing and Performance).  See also
    /// [](#Terminology) for the definition of leaf and root.
    static int getBasename(bsl::string             *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
    static int getBasename(std::string             *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int getBasename(std::pmr::string        *leaf,
                           const bsl::string_view&  path,
                           int                      rootEnd = -1);
#endif

    /// Load into the specified `dirname` the value of the directory part of
    /// the specified `path`, that is, the root if it exists and all the
    /// filenames except the last one (the leaf).  If the optionally specified
    /// `rootEnd` offset is non-negative, it is taken as the position in `path`
    /// of the character following the root.  Return 0 on success, and a
    /// non-zero value otherwise; in particular, return a nonzero value if
    /// `path` does not have a leaf.  Note that in the case of a relative path
    /// with a single filename, the function will succeed and `dirname` will be
    /// the empty string.  See [](#Parsing and Performance).  See also
    /// [](#Terminology) for the definition of directories and root.
    static int getDirname(bsl::string             *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
    static int getDirname(std::string             *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int getDirname(std::pmr::string        *dirname,
                          const bsl::string_view&  path,
                          int                      rootEnd = -1);
#endif

    /// Load into the specified `leaf` the value of the rightmost name in the
    /// specified `path` that follows the root; that is, the leaf element.  If
    /// the optionally specified `rootEnd` offset is non-negative, it is taken
    /// as the position in `path` of the character following the root.  Return
    /// 0 on success, and a non-zero value otherwise; in particular, return
    /// nonzero if `path` does not have a leaf.  Note that `getBasename` is a
    /// synonym for `getLeaf`.  See [](#Parsing and Performance).  See also
    /// [](#Terminology) for the definition of leaf and root.
    static int getLeaf(bsl::string             *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
    static int getLeaf(std::string             *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int getLeaf(std::pmr::string        *leaf,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#endif

    /// Load into the specified `extension` the extension of `path`.  If the
    /// optionally specified `rootEnd` offset is non-negative, it is taken as
    /// the position in `path` of the character following the root.  Return 0
    /// if the path has an extension, and a non-zero value otherwise.  See
    /// [](#Parsing and Performance).  See also [](#Terminology) for the
    /// definition of leaf and root.
    static int getExtension(bsl::string             *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
    static int getExtension(std::string             *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int getExtension(std::pmr::string        *extension,
                            const bsl::string_view&  path,
                            int                      rootEnd = -1);
#endif

    /// Load into the specified 'root' the value of the root part of the
    /// specified 'path'.  If the optionally specified 'rootEnd' offset is
    /// non-negative, it is taken as the position in 'path' of the character
    /// following the root.  Return 0 on success, and a non-zero value
    /// otherwise; in particular, return a nonzero value if 'path' is relative.
    /// Note that the meaning of the root part is platform-dependent.  See
    /// [](#Parsing and Performance).  See also [](#Terminology) for the
    /// definition of root.
    static int getRoot(bsl::string             *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
    static int getRoot(std::string             *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
    static int getRoot(std::pmr::string        *root,
                       const bsl::string_view&  path,
                       int                      rootEnd = -1);
#endif

    /// Load the last pathname component from the specified `path` into the
    /// specified `tail` and everything leading up to that to the specified
    /// `head`.  If the optionally specified `rootEnd` offset is
    /// non-negative, it is taken as the position in `path` of the character
    /// following the root.  The `tail` part never contains a slash; if
    /// `path` ends in a slash, `tail` is empty.  If there is no slash in
    /// `path`, `head` is empty.  If `path` is empty, both `head` and `tail`
    /// are empty.  Trailing slashes are stripped from `head` unless it is
    /// the root.
    /// ```
    /// +------------------+------------+---------+
    /// |      PATH        |    HEAD    |   TAIL  |
    /// +==================+============+=========+
    /// | "one"            | ""         | "one"   |
    /// +------------------+------------+---------+
    /// | "/one/two/three" | "/one/two" | "three" |
    /// +------------------+------------+---------+
    /// | "//one/two///"   | "/one/two" | ""      |
    /// +------------------+------------+---------+
    /// | "c:\\one\\two"   | "c:\\one"  | "two"   |
    /// +------------------+------------+---------+
    /// ```
    /// See {`Terminology`} for the definition of root.  The behavior is
    /// undefined unless `head != tail` and `INT_MAX >= path.length()`.
    /// Note that `head` or `tail` may point to the `path` object when the
    /// method is called.
    static void splitFilename(bsl::string_view        *head,
                              bsl::string_view        *tail,
                              const bsl::string_view&  path,
                              int                      rootEnd = -1);

    /// Return `true` if the specified `path` is absolute (has a root), and
    /// `false` otherwise.  If the optionally specified `rootEnd` offset is
    /// non-negative, it is taken as the position in `path` of the character
    /// following the root.  See [](#Parsing and Performance).  See also
    /// [](#Terminology) for the definition of root.
    static bool isAbsolute(const bsl::string_view& path, int rootEnd = -1);

    /// Return `true` if the specified `path` is relative (lacks a root), and
    /// `false` otherwise.  If the optionally specified `rootEnd` offset is
    /// non-negative, it is taken as the position in `path` of the character
    /// following the root.  See [](#Parsing and Performance).  See also
    /// [](#Terminology) for the definition of root.
    static bool isRelative(const bsl::string_view& path, int rootEnd = -1);

    /// Return `true` if the specified `path` has a filename following the
    /// root, and `false` otherwise.  If the optionally specified `rootEnd`
    /// offset is non-negative, it is taken as the position in `path` of the
    /// character following the root.  See [](#Parsing and Performance).  See
    /// also [](#Terminology) for the definition of root.
    static bool hasLeaf(const bsl::string_view& path, int rootEnd = -1);

    /// Return the 0-based position in the specified `path` of the character
    /// following the root.  Note that a return value of 0 indicates a relative
    /// path.  See [](#Parsing and Performance).  See also [](#Terminology) for
    /// the definition of root.
    static int getRootEnd(const bsl::string_view& path);
};

// ============================================================================
//                            INLINE DEFINITIONS
// ============================================================================

                               // --------------
                               // class PathUtil
                               // --------------

// CLASS METHODS
inline
int PathUtil::getBasename(bsl::string              *leaf,
                          const bsl::string_view&  path,
                          int                       rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}

inline
int PathUtil::getBasename(std::string              *leaf,
                          const bsl::string_view&  path,
                          int                       rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}

#ifdef BSLS_LIBRARYFEATURES_HAS_CPP17_PMR_STRING
inline
int PathUtil::getBasename(std::pmr::string        *leaf,
                          const bsl::string_view&  path,
                          int                      rootEnd)
{
    BSLS_ASSERT(leaf);

    return getLeaf(leaf, path, rootEnd);
}
#endif

}  // close package namespace
}  // close enterprise namespace

#endif

// ----------------------------------------------------------------------------
// Copyright 2015 Bloomberg Finance L.P.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ----------------------------- END-OF-FILE ----------------------------------
